# -------------------------------------------------------------------
#       -*- coding: utf-8 -*-
#   @Project    :   spider
#   @File       :   25_猪八戒网_xpath.py
#   @Author     :   WANGYU
#   @Time       :   2021-08-18 11:04:25
#   @Software   :   PyCharm
#   @Desc       :   
# -------------------------------------------------------------------


# 1拿页面源代码
# 2提取解析

import requests
from lxml import html


# 获取页面源代码函数
def get_html(url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }
    response = requests.get(url, headers=header)
    response.encoding = 'utf-8'
    return response.text


# text='''xxx'''
# etree = html.etree
# htmlDiv = etree.HTML(text) #构造了一个XPath解析对象并对HTML文本进行自动修正。
# title = htmls.xpath("//meta[1]/@content")
# print(title)
search = input('输入需要查询的内容：')
url = f'https://qinghai.zbj.com/search/f/?kw={search}'
# resp = requests.get(url)
# print(resp.text)
# 解析
html_content = html.etree.HTML(get_html(url))
# html_content = html.etree.HTML(resp.text)
# 拿到每一个服务商到div
divs = html_content.xpath('/html/body/div[6]/div/div/div[2]/div[5]/div/div')

# print(divs)
for div in divs:
    # company = div.xpath('/html/body/div[6]/div/div/div[2]/div[5]/div/div[1]/div/div/a[1]/div[1]/p/text()')[1]
    company = div.xpath('./div/div/a[1]/div[1]/p/text()')[1]
    price = div.xpath('./div/div/a[2]/div[2]/div[1]/span[1]/text()')[0].strip('¥')

    print(company, price)
